Experiment Setup

All data were pre-processed using minion to identify and check adapters, reaper to trim adapter sequences followed by tally to deduplicate reads while maintaining depth information. Subsequent to this all reads passed through the mirmod pipeline against all miRBase (Release 21) precursor sequences for Mouse and Zebrafish. Reads were summed across paired end sequences for the same read pair. Finally reads are loaded into R for final analysis.

This is the sample description file used for the analyses below.

Name FileName Species SampleName QC Genotype
A543S1 A543S1.R1.fastq.gz mouse NREPc1WT0808 qc_all wt
A543S2 A543S2.R1.fastq.gz mouse NREPc8ho0808 qc_all scrambled
A543S3 A543S3.R1.fastq.gz mouse NREPc1WT1605 qc_all wt
A543S4 A543S4.R1.fastq.gz mouse NREPc12WT1605 qc_all wt
A543S5 A543S5.R1.fastq.gz mouse NREPc8ho1605 qc_all scrambled
A543S6 A543S6.R1.fastq.gz mouse NREPc29ho1605 qc_all scrambled
A543S7 A543S7.R1.fastq.gz zebrafish wt1zf qc_all wt
A543S8 A543S8.R1.fastq.gz zebrafish wt2zf qc_all wt
A543S9 A543S9.R1.fastq.gz zebrafish consdelcyrzf1 qc_all del
A543S10 A543S10.R1.fastq.gz zebrafish consdelcyrzf2 qc_all del

Preparation

We first load the R/BioConductor libraries that we need.

library(RColorBrewer)
library(gplots)
library(DESeq2)
library(reshape2)
library(ggplot2)
hmcol = colorRampPalette(brewer.pal(9, "GnBu"))(100)
spectral <- colorRampPalette(rev(brewer.pal(11, "Spectral")), space="Lab")(100)

Mouse Analysis

Count Loading

We can now load all the count data

setwd("/Users/aje/anton_r_notebook/alena_mirna_oct_2016/")
mircounts <- read.table("mouse_counts3.txt",header=TRUE,row.names=1)
mircounts=mircounts[-nrow(mircounts),]

As well as the pdata, which contains information on each sample. We will subset for one species at a time, now Mouse

pdata <- read.table("pdata.txt",header=TRUE,row.names=1,sep="\t")
pdata=pdata[pdata$Species=="mouse",]

colnames(mircounts)=rownames(pdata)

conds=as.factor(as.character(pdata$Genotype))

# Filter S1 & S2 ??
#mircounts=mircounts[,c(3:6)]
#pdata=pdata[c(3:6),]
#conds=conds[3:6]

Count Preparation & Normalisation

We are now ready to create a DESeq object from the counts table.

#Lets Load the Counts First
coldata = as.data.frame(t(t(conds)))
rownames(coldata)=colnames(mircounts)
colnames(coldata)='treatment'
dds <- DESeqDataSetFromMatrix(countData = mircounts, colData = coldata, design = ~ treatment)

We are ready to normalise the data, but first we should look at the number of sequenced reads per sample.

cond_colours = c("#E41A1C","#377EB8")[as.factor(conds)]
names(cond_colours)=conds

group_colours = brewer.pal(length(rownames(pdata)),"Accent")[as.factor(rownames(pdata))]
names(group_colours)=rownames(pdata)

quartz()
barplot(apply(mircounts,2,sum), las=2,col=cond_colours,main="Pre Normalised Counts",cex.names=0.4)
legend("topright",levels((conds)),cex=0.6,fill=cond_colours[levels(conds)])

We will also estimate the negative binomial dispersion of the data.

dds <- estimateSizeFactors(dds)
dds <- estimateDispersions(dds)
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
quartz()
plotDispEsts(dds)

Post Normalisation QC

Now we can normalise and plot the counts again.

normcounts <- counts(dds, normalized=TRUE)
rawcounts=counts(dds,normalized=FALSE)
log2counts=log2(normcounts+1)


quartz()
barplot(apply(normcounts,2,sum), las=2,col=cond_colours,main="Post-Normalised Counts",cex.names=0.4)
legend("topright",levels((conds)),cex=0.6,fill=cond_colours[levels(conds)])

We will apply the Variance Stabilising Transformation (VST) it’s better than log2 for counts.

vsd <- varianceStabilizingTransformation(dds)
vstcounts <- assay(vsd)
vstcounts <- vstcounts[order(apply(vstcounts,1,sum),decreasing =TRUE),]

As an additional QC step we can calculate the sample-to-sample Pearson correlations and plot them in a heatmap.

quartz()
heatmap.2(cor(rawcounts),trace="none",col=hmcol,main="Sample to Sample Correlation (Raw Counts)",cexRow=0.5,cexCol=0.5,RowSideColors=cond_colours, margins=c(9,7))

quartz()
heatmap.2(cor(vstcounts),trace="none",col=hmcol,main="Sample to Sample Correlation (VST)",cexRow=0.5,cexCol=0.5,RowSideColors=cond_colours,margins=c(9,7))

We can also perform PCA.

pca <- princomp(vstcounts)


quartz()
par(mfrow=c(2,1))
plot(pca$loadings, col=cond_colours,  pch=19, cex=2, main="Sample to Sample PCA (VST)")
text(pca$loadings, as.vector(colnames(mircounts)), pos=3, cex=0.4)
legend("topright",levels(conds),fill=cond_colours[levels(conds)],cex=0.4)

PCA of the top 3 Principal Components.

pca2=prcomp(t(vstcounts),center=TRUE)

quartz()
par(mfrow=c(1,3))
plot(pca2$x, col=cond_colours,  pch=19, cex=2, main="Sample to Sample PCA (VST)")
text(pca2$x, as.vector(colnames(mircounts)), pos=3, cex=0.4)
plot(pca2$x[,1],pca2$x[,3], col=cond_colours,  pch=19, cex=2, main="Sample to Sample PCA (VST)",ylab="PC3",xlab="PC1")
text(pca2$x[,1],pca2$x[,3], as.vector(colnames(mircounts)), pos=3, cex=0.4)
plot(pca2$x[,2],pca2$x[,3], col=cond_colours,  pch=19, cex=2, main="Sample to Sample PCA (VST)",ylab="PC3",xlab="PC2")
text(pca2$x[,2],pca2$x[,3], as.vector(colnames(mircounts)), pos=3, cex=0.4)

Initial Biological Analysis of the data

Here are the top10 microRNAs.

top10=apply(mircounts,1,sum)[1:10]
top10[11]=sum(apply(mircounts,1,sum)[11:nrow(mircounts)])
names(top10)[11]="other"
pie(top10,col=brewer.pal(11,"Set3"),main="Top10 microRNAs")

This is the expression of the top10 microRNAs sample to sample.

heatmap.2(vstcounts[names(top10)[1:10],],col=hmcol,trace="none",cexCol=0.4,cexRow=0.6,ColSideColors=cond_colours)

This is the expression of the miR-29 families of microRNAs sample to sample.

heatmap.2(vstcounts[rownames(mircounts)[grep("mir-29[a-z]",rownames(mircounts))],],col=hmcol,trace="none",cexCol=0.4,cexRow=0.6,ColSideColors=cond_colours)

quartz()
barplot(t(vstcounts[rownames(mircounts)[grep("mir-29[a-z]",rownames(mircounts))],]),beside=T,las=2,cex.names=0.5,col=cond_colours,main="miR-29 levels (VST)")
legend("topright",rownames(pdata),fill=cond_colours,cex=0.4)

Statistical Analysis

Run the statistical contrast on the count data

p_threshold=0.05
lfc_threshold=0.75

cds <- nbinomWaldTest(dds)

res=results(cds,contrast=c("treatment","wt","scrambled"))
res <- res[order(res$padj),]
res
## log2 fold change (MAP): treatment wt vs scrambled 
## Wald test p-value: treatment wt vs scrambled 
## DataFrame with 1513 rows and 6 columns
##                   baseMean log2FoldChange     lfcSE       stat
##                  <numeric>      <numeric> <numeric>  <numeric>
## mmu-mir-196b-5p   138.1979       2.923347 0.3645313   8.019466
## mmu-mir-218-1-5p 6357.5799       1.586746 0.2740549   5.789882
## mmu-mir-150-5p    141.2804      -1.849097 0.3307271  -5.591004
## mmu-mir-139-5p    240.8153      -1.555849 0.2846775  -5.465304
## mmu-mir-10b-5p   4030.2773       2.055345 0.3886272   5.288731
## ...                    ...            ...       ...        ...
## mmu-mir-682-5p   0.6753636    -0.06668301 0.3730918 -0.1787308
## mmu-mir-669k-3p  0.4527021     0.05247907 0.3193682  0.1643216
## mmu-mir-208b-5p  0.2028985    -0.07803852 0.2393486 -0.3260454
## mmu-mir-7017-3p  0.4057970    -0.13881524 0.2429540 -0.5713644
## mmu-mir-7220-5p  0.1134521     0.03479149 0.2393486  0.1453591
##                        pvalue         padj
##                     <numeric>    <numeric>
## mmu-mir-196b-5p  1.062058e-15 6.722826e-13
## mmu-mir-218-1-5p 7.043605e-09 2.229301e-06
## mmu-mir-150-5p   2.257602e-08 4.763540e-06
## mmu-mir-139-5p   4.621129e-08 7.312936e-06
## mmu-mir-10b-5p   1.231676e-07 1.559302e-05
## ...                       ...          ...
## mmu-mir-682-5p      0.8581491           NA
## mmu-mir-669k-3p     0.8694780           NA
## mmu-mir-208b-5p     0.7443900           NA
## mmu-mir-7017-3p     0.5677527           NA
## mmu-mir-7220-5p     0.8844274           NA
sig = rownames(res[(abs(res$log2FoldChange) > lfc_threshold) & (res$padj < p_threshold) & !is.na(res$padj),])

Volcanoplots of Significant Hits

plot(res$log2FoldChange,-log(res$padj,10),ylab="-log10(Adjusted P)",xlab="Log2 FoldChange",main=paste("Volcano Plot","WT v Scr\nmir-29 in green\nsig. in red"),pch=19,cex=0.4)      
if (length(sig)>=1){
text(res[sig,]$log2FoldChange,-log(res[sig,]$padj,10),labels=rownames(res[rownames(mircounts)[grep("mir-29[a-z]",rownames(mircounts))],]),pos=3,cex=0.6)
points(res[sig,"log2FoldChange"],-log(res[sig,"padj"],10),pch=19,cex=0.4,col="red")
points(res[rownames(mircounts)[grep("mir-29[a-z]",rownames(mircounts))],"log2FoldChange"],-log(res[rownames(mircounts)[grep("mir-29[a-z]",rownames(mircounts))],"padj"],10),pch=19,cex=0.6,col="green")

}
abline(h=-log10(p_threshold),lty=3)
abline(v=-lfc_threshold,lty=3)
abline(v=lfc_threshold,lty=3)   

plot(apply(vstcounts[,grep("wt",conds)],1,median),apply(vstcounts[,grep("scrambled",conds)],1,median),col="darkblue",cex=0.4,pch=19)
if (length(sig)>1){
points(apply(vstcounts[sig,grep("wt",conds)],1,median),apply(vstcounts[sig,grep("scrambled",conds)],1,median),col="red")
points(apply(vstcounts[rownames(vstcounts)[grep("mir-29[a-z]",rownames(vstcounts))],grep("wt",conds)],1,median),apply(vstcounts[rownames(vstcounts)[grep("mir-29[a-z]",rownames(vstcounts))],grep("scrambled",conds)],1,median),col="green",cex=1.2)
text(apply(vstcounts[sig,grep("wt",conds)],1,median),apply(vstcounts[sig,grep("scrambled",conds)],1,median),labels=sig,cex=0.4,pos=1)
text(apply(vstcounts[rownames(vstcounts)[grep("mir-29[a-z]",rownames(vstcounts))],grep("wt",conds)],1,median),apply(vstcounts[rownames(vstcounts)[grep("mir-29[a-z]",rownames(vstcounts))],grep("scrambled",conds)],1,median),labels=rownames(vstcounts)[grep("mir-29[a-z]",rownames(vstcounts))],cex=0.4,pos=1,col="darkgreen")
}
abline(a=0,b=1,lty=2,col="red")

Heatmap of significant hits.

heatmap.2(vstcounts[sig,],trace="none",ColSideColors = cond_colours,col=hmcol,margins=c(5,5),cexRow=0.5,cexCol=0.6,labCol=paste(rownames(pdata),pdata$SampleName,sep="\n"),main="Significant Hits Heatmap (VST)")

Result output to text file

Let’s output the final results table with normalised expression values and stats listed

write.table(cbind(as.matrix(counts(dds,normalized=T)[rownames(res),]),as.matrix(res)),"mouse_results.txt",quote=F,sep="\t")

Length Analysis

Now we load in the length data from the mapping analysis separately to analyse. We will analyse the top 10 expressed, top 10 differential miRs and the miR-29 family, (Excluding miRs with norm counts sum < 50)

length_mouse=read.table("length_tables_mouse.txt",sep="\t",header=FALSE)
length_mouse$genotype=gsub("\\d+.lane.clean.processed.fa.gz","",gsub("mouse_","",length_mouse$V2))

mirlist=unique(c(rownames(counts(dds,normalized=T)[1:10,]),rownames(res[1:15,]),rownames(mircounts)[grep("mir-29[a-z]",rownames(mircounts))]))

for (i in 1:length(mirlist)){
mir=mirlist[i]

if (median(normcounts[mirlist[i],]) >= 50){
length_table=as.matrix(length_mouse[length_mouse$V1==mir,4:34])/apply(as.matrix(length_mouse[length_mouse$V1==mir,4:34]),1,max)
rownames(length_table)=length_mouse[length_mouse$V1==mir,"genotype"]

length_table=length_table[order(rownames(length_table)),]

colours = c("#E41A1C","#377EB8")[as.factor(rownames(length_table))]
names(colours)=as.factor(rownames(length_table))

heatmap.2(length_table,col=spectral,trace="none",Rowv=F,Colv=F,dendrogram="none",labCol=paste(c(0:30),"nt"),main=paste("Length Table\n",mir),RowSideColors=colours)

barplot(length_table,beside=T,col=colours,names=paste(c(0:30),"nt"),las=2)

matplot(t(length_table),type="b",col=colours,pch=19,cex=0.4,lty=1,lwd=0.4,main=paste("Length Analysis:",mir),xlab=paste(c(0:30),"nt"),las=2,xaxt="n")
axis(1, at = 1:31, labels = paste(c(0:30),"nt"), cex.axis = 0.7,las=2)
legend("topright",levels(as.factor(rownames(length_table))),fill=colours[levels(as.factor(rownames(length_table)))])
}
}

Zebrafish Analysis

Count Loading

We can now load all the count data

setwd("/Users/aje/anton_r_notebook/alena_mirna_oct_2016/")
mircounts <- read.table("zebrafish_counts3.txt",header=TRUE,row.names=1)
mircounts=mircounts[-nrow(mircounts),]

As well as the pdata, which contains information on each sample. We will subset for one species at a time, now Zebrafish.

pdata <- read.table("pdata.txt",header=TRUE,row.names=1,sep="\t")
pdata=pdata[pdata$Species=="zebrafish",]

colnames(mircounts)=rownames(pdata)

conds=as.factor(as.character(pdata$Genotype))

Count Preparation & Normalisation

We are now ready to create a DESeq object from the counts table.

#Lets Load the Counts First
coldata = as.data.frame(t(t(conds)))
rownames(coldata)=colnames(mircounts)
colnames(coldata)='treatment'
dds <- DESeqDataSetFromMatrix(countData = mircounts, colData = coldata, design = ~ treatment)

We are ready to normalise the data, but first we should look at the number of sequenced reads per sample.

cond_colours = c("#E41A1C","#377EB8")[as.factor(conds)]
names(cond_colours)=conds

group_colours = brewer.pal(length(rownames(pdata)),"Accent")[as.factor(rownames(pdata))]
names(group_colours)=rownames(pdata)

quartz()
barplot(apply(mircounts,2,sum), las=2,col=cond_colours,main="Pre Normalised Counts",cex.names=0.4)
legend("topright",levels((conds)),cex=0.6,fill=cond_colours[levels(conds)])

We will also estimate the negative binomial dispersion of the data.

dds <- estimateSizeFactors(dds)
dds <- estimateDispersions(dds)
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
quartz()
plotDispEsts(dds)

Post Normalisation QC

Now we can normalise and plot the counts again.

normcounts <- counts(dds, normalized=TRUE)
rawcounts=counts(dds,normalized=FALSE)
log2counts=log2(normcounts+1)


quartz()
barplot(apply(normcounts,2,sum), las=2,col=cond_colours,main="Post-Normalised Counts",cex.names=0.4)
legend("topright",levels((conds)),cex=0.6,fill=cond_colours[levels(conds)])

We will apply the Variance Stabilising Transformation (VST) it’s better than log2 for counts.

vsd <- varianceStabilizingTransformation(dds)
vstcounts <- assay(vsd)
vstcounts <- vstcounts[order(apply(vstcounts,1,sum),decreasing =TRUE),]

As an additional QC step we can calculate the sample-to-sample Pearson correlations and plot them in a heatmap.

quartz()
heatmap.2(cor(rawcounts),trace="none",col=hmcol,main="Sample to Sample Correlation (Raw Counts)",cexRow=0.5,cexCol=0.5,RowSideColors=cond_colours, margins=c(9,7))

quartz()
heatmap.2(cor(vstcounts),trace="none",col=hmcol,main="Sample to Sample Correlation (VST)",cexRow=0.5,cexCol=0.5,RowSideColors=cond_colours,margins=c(9,7))

We can also perform PCA.

pca <- princomp(vstcounts)


quartz()
par(mfrow=c(2,1))
plot(pca$loadings, col=cond_colours,  pch=19, cex=2, main="Sample to Sample PCA (VST)")
text(pca$loadings, as.vector(colnames(mircounts)), pos=3, cex=0.4)
legend("topright",levels(conds),fill=cond_colours[levels(conds)],cex=0.4)

PCA of the top 3 Principal Components.

pca2=prcomp(t(vstcounts),center=TRUE)

quartz()
par(mfrow=c(1,3))
plot(pca2$x, col=cond_colours,  pch=19, cex=2, main="Sample to Sample PCA (VST)")
text(pca2$x, as.vector(colnames(mircounts)), pos=3, cex=0.4)
plot(pca2$x[,1],pca2$x[,3], col=cond_colours,  pch=19, cex=2, main="Sample to Sample PCA (VST)",ylab="PC3",xlab="PC1")
text(pca2$x[,1],pca2$x[,3], as.vector(colnames(mircounts)), pos=3, cex=0.4)
plot(pca2$x[,2],pca2$x[,3], col=cond_colours,  pch=19, cex=2, main="Sample to Sample PCA (VST)",ylab="PC3",xlab="PC2")
text(pca2$x[,2],pca2$x[,3], as.vector(colnames(mircounts)), pos=3, cex=0.4)

par(mfrow=c(1,1))

Initial Biological Analysis of the data

Here are the top10 microRNAs.

top10=apply(mircounts,1,sum)[1:10]
top10[11]=sum(apply(mircounts,1,sum)[11:nrow(mircounts)])
names(top10)[11]="other"
pie(top10,col=brewer.pal(11,"Set3"),main="Top10 microRNAs")

This is the expression of the top10 microRNAs sample to sample.

heatmap.2(vstcounts[names(top10)[1:10],],col=hmcol,trace="none",cexCol=0.4,cexRow=0.6,ColSideColors=cond_colours)

This is the expression of the miR-29 families of microRNAs sample to sample.

heatmap.2(vstcounts[rownames(mircounts)[grep("mir-7[a-z]",rownames(mircounts))],],col=hmcol,trace="none",cexCol=0.4,cexRow=0.6,ColSideColors=cond_colours)

quartz()
barplot(t(vstcounts[rownames(mircounts)[grep("mir-7[a-z]",rownames(mircounts))],]),beside=T,las=2,cex.names=0.5,col=cond_colours,main="miR-7 levels (VST)")
legend("topright",rownames(pdata),fill=cond_colours,cex=0.4)

Statistical Analysis

Run the statistical contrast on the count data

p_threshold=0.05
lfc_threshold=0.75

cds <- nbinomWaldTest(dds)

res=results(cds,contrast=c("treatment","wt","del"))
res <- res[order(res$padj),]
res
## log2 fold change (MAP): treatment wt vs del 
## Wald test p-value: treatment wt vs del 
## DataFrame with 545 rows and 6 columns
##                      baseMean log2FoldChange      lfcSE        stat
##                     <numeric>      <numeric>  <numeric>   <numeric>
## dre-let-7e-5p       1258.2221      1.1339745  0.1836661    6.174110
## dre-mir-2189-3p     4228.4625      1.1794140  0.2186925    5.393024
## dre-mir-430a-8-5p   2725.7388     -0.9606233  0.2093750   -4.588052
## dre-mir-738-5p       171.4140      0.9621898  0.2263565    4.250772
## dre-let-7c-2-3p      173.1958      0.7843486  0.2163074    3.626083
## ...                       ...            ...        ...         ...
## dre-mir-430b-19-3p 0.09529112   0.0005011098 0.04309577  0.01162782
## dre-mir-124-3-3p   0.48459106  -0.0186131929 0.05350431 -0.34788210
## dre-mir-193a-2-3p  0.09529112   0.0005011098 0.04309577  0.01162782
## dre-mir-124-4-3p   0.46846364  -0.0182240783 0.05293562 -0.34426873
## dre-mir-27e-5p     0.76247246  -0.0591662887 0.06270408 -0.94357963
##                          pvalue         padj
##                       <numeric>    <numeric>
## dre-let-7e-5p      6.653722e-10 3.626279e-07
## dre-mir-2189-3p    6.928177e-08 1.887928e-05
## dre-mir-430a-8-5p  4.474018e-06 8.127799e-04
## dre-mir-738-5p     2.130352e-05 2.902605e-03
## dre-let-7c-2-3p    2.877529e-04 3.136507e-02
## ...                         ...          ...
## dre-mir-430b-19-3p    0.9907226    0.9969942
## dre-mir-124-3-3p      0.7279287    0.9969942
## dre-mir-193a-2-3p     0.9907226    0.9969942
## dre-mir-124-4-3p      0.7306442    0.9969942
## dre-mir-27e-5p        0.3453845    0.9969942
sig = rownames(res[(abs(res$log2FoldChange) > lfc_threshold) & (res$padj < p_threshold) & !is.na(res$padj),])

Volcanoplots of Significant Hits

plot(res$log2FoldChange,-log(res$padj,10),ylab="-log10(Adjusted P)",xlab="Log2 FoldChange",main=paste("Volcano Plot","WT v Del\nmir-7 in green\nsig. in red"),pch=19,cex=0.4)      
if (length(sig)>=1){
text(res[sig,]$log2FoldChange,-log(res[sig,]$padj,10),labels=rownames(res[rownames(mircounts)[grep("mir-7[a-z]",rownames(mircounts))],]),pos=3,cex=0.6)
points(res[sig,"log2FoldChange"],-log(res[sig,"padj"],10),pch=19,cex=0.4,col="red")
points(res[rownames(mircounts)[grep("mir-7[a-z]",rownames(mircounts))],"log2FoldChange"],-log(res[rownames(mircounts)[grep("mir-7[a-z]",rownames(mircounts))],"padj"],10),pch=19,cex=0.6,col="green")

}
abline(h=-log10(p_threshold),lty=3)
abline(v=-lfc_threshold,lty=3)
abline(v=lfc_threshold,lty=3)   

plot(apply(vstcounts[,grep("wt",conds)],1,median),apply(vstcounts[,grep("del",conds)],1,median),col="darkblue",cex=0.4,pch=19)
if (length(sig)>1){
points(apply(vstcounts[sig,grep("wt",conds)],1,median),apply(vstcounts[sig,grep("del",conds)],1,median),col="red")
points(apply(vstcounts[rownames(vstcounts)[grep("mir-7[a-z]",rownames(vstcounts))],grep("wt",conds)],1,median),apply(vstcounts[rownames(vstcounts)[grep("mir-7[a-z]",rownames(vstcounts))],grep("del",conds)],1,median),col="green",cex=1.2)
text(apply(vstcounts[sig,grep("wt",conds)],1,median),apply(vstcounts[sig,grep("del",conds)],1,median),labels=sig,cex=0.4,pos=1)
text(apply(vstcounts[rownames(vstcounts)[grep("mir-7[a-z]",rownames(vstcounts))],grep("wt",conds)],1,median),apply(vstcounts[rownames(vstcounts)[grep("mir-7[a-z]",rownames(vstcounts))],grep("del",conds)],1,median),labels=rownames(vstcounts)[grep("mir-7[a-z]",rownames(vstcounts))],cex=0.4,pos=1,col="darkgreen")
}
abline(a=0,b=1,lty=2,col="red")

Heatmap of significant hits

heatmap.2(vstcounts[sig,],trace="none",ColSideColors = cond_colours,col=hmcol,margins=c(5,5),cexRow=0.5,cexCol=0.6,labCol=paste(rownames(pdata),pdata$SampleName,sep="\n"),main="Significant Hits Heatmap (VST)")

Result output to text file

Let’s output the final results table with normalised expression values and stats listed

write.table(cbind(as.matrix(counts(dds,normalized=T)[rownames(res),]),as.matrix(res)),"zebrafish_results.txt",quote=F,sep="\t")

Length Analysis

Now we load in the length data from the mapping analysis separately to analyse. We will analyse the top 10 expressed, top 10 differential miRs and the miR-7 family, (Excluding miRs with norm counts sum <50)

length_zebrafish=read.table("length_tables_zebrafish.txt",sep="\t",header=FALSE)
length_zebrafish$genotype=gsub("\\d+.lane.clean.processed.fa.gz","",gsub("zfish_","",length_zebrafish$V2))

mirlist=unique(c(rownames(counts(dds,normalized=T)[1:10,]),rownames(res[1:15,]),rownames(mircounts)[grep("mir-7[a-z]",rownames(mircounts))]))

for (i in 1:length(mirlist)){
mir=mirlist[i]

if (median(normcounts[mirlist[i],]) >= 50){

length_table=as.matrix(length_zebrafish[length_zebrafish$V1==mir,4:34])/apply(as.matrix(length_zebrafish[length_zebrafish$V1==mir,4:34]),1,max)
rownames(length_table)=length_zebrafish[length_zebrafish$V1==mir,"genotype"]

length_table=length_table[order(rownames(length_table)),]

colours = c("#E41A1C","#377EB8")[as.factor(rownames(length_table))]
names(colours)=as.factor(rownames(length_table))

heatmap.2(length_table,col=spectral,trace="none",Rowv=F,Colv=F,dendrogram="none",labCol=paste(c(0:30),"nt"),main=paste("Length Table\n",mir),RowSideColors=colours)

barplot(length_table,beside=T,col=colours,names=paste(c(0:30),"nt"),las=2)

matplot(t(length_table),type="b",col=colours,pch=19,cex=0.4,lty=1,lwd=0.4,main=paste("Length Analysis:",mir),xlab=paste(c(0:30),"nt"),las=2,xaxt="n")
axis(1, at = 1:31, labels = paste(c(0:30),"nt"), cex.axis = 0.7,las=2)
legend("topright",levels(as.factor(rownames(length_table))),fill=colours[levels(as.factor(rownames(length_table)))])
}
}